#install.packages('ggmap')
#install.packages("dplyr")
#install.packages("plyr")
#install.packages("leaflet")

library(ggmap)
## Loading required package: ggplot2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.4.4

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(dplyr)
library(leaflet)
library(ggthemes)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(leaflet)
library(readr)
library(RColorBrewer)
library(geosphere)
library(geojsonio)
## Warning: package 'geojsonio' was built under R version 3.4.4
## 
## Attaching package: 'geojsonio'
## The following object is masked from 'package:geosphere':
## 
##     centroid
## The following object is masked from 'package:base':
## 
##     pretty
library(sp)
library(htmltools)
## 
## Attaching package: 'htmltools'
## The following object is masked from 'package:geosphere':
## 
##     span
library("ggmap")
library(dplyr)
library(googleway)
## 
## Attaching package: 'googleway'
## The following objects are masked from 'package:plotly':
## 
##     add_heatmap, add_markers, add_polygons
library(leaflet)


#Automate for entire dataset

data_replace_mean<- function(data){
  for(i in 1:ncol(data)){
  data[is.na(data[,i]), i] <- mean(data[,i], na.rm = TRUE)
  }
  return(data)
}

listings_col_to_keep = c("id","name","host_id","host_name","host_location","neighbourhood_cleansed","neighbourhood_group_cleansed","city","state","smart_location","latitude","longitude","property_type","room_type","accommodates","bathrooms","bedrooms","beds","price","weekly_price","monthly_price","security_deposit","cleaning_fee","review_scores_rating","review_scores_accuracy","review_scores_cleanliness","review_scores_checkin","review_scores_communication","review_scores_location","review_scores_value","reviews_per_month","host_listings_count","host_total_listings_count","calculated_host_listings_count","availability_365","number_of_reviews")  

df_subset_columns <- function(df){
  df_subset = subset(df,select=listings_col_to_keep)
  df_subset = data_replace_mean(df_subset)
  return(df_subset)
}

clean_data <- function(df){

  df_subset = subset(df,select=listings_col_to_keep)
  df_subset = data_replace_mean(df_subset)
  df_subset$is_commercial = NA


for(i in 1:nrow(df_subset)){
  if((df_subset[i,"calculated_host_listings_count"] > 1)  &  (df_subset[i,"availability_365"]/365 < 0.3) & (df_subset[i,"review_scores_rating"]>50)){
    df_subset[i,"is_commercial"] = "Commercial Listing"  
  }
  else{
    df_subset[i,"is_commercial"] = "Household Listing"
  }
}

df_commercial = subset(df_subset,select = c("id","is_commercial"))
return (df_commercial)
}

listings_file_18 <- read.csv("/Users/ranjitarajeevashetty/Downloads/March_2018/listings_all.csv")


listing_18_commercial <- clean_data(listings_file_18)
## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA
## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA
#combined_commercial <- rbind(listing_1_commercial,listing_2_commercial)

all_commercial_18 <- filter(listing_18_commercial, is_commercial=="Commercial Listing")
## Warning: package 'bindrcpp' was built under R version 3.4.4
unique_all_commercial_18 <- unique(all_commercial_18$id)

all_non_commercial_18 <- filter(listing_18_commercial,(is_commercial=="Household Listing"))

unique_all_non_commercial_18 <- data.frame(unique(all_non_commercial_18$id))

unique_all_non_commercial_18 <- filter(unique_all_non_commercial_18, !(unique.all_non_commercial_18.id. %in% unique_all_commercial_18))

unique_all_commercial_18 = data.frame(unique_all_commercial_18)

unique_all_commercial_18$is_commercial <- "Commercial Listing"

unique_all_non_commercial_18$is_commercial <- "Household Listing"

colnames(unique_all_commercial_18) = c("id","is_commercial")
colnames(unique_all_non_commercial_18) = c("id","is_commercial")

combined_unique_listings_18 = rbind(unique_all_commercial_18,unique_all_non_commercial_18)
#end

listings_updated_18 = df_subset_columns(listings_file_18)
## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric
## or logical: returning NA
total_listing = merge(listings_updated_18,combined_unique_listings_18,by="id",all.x = TRUE)#left join

#sapply(var1, tolower), converting to lower

tourist_places_list<-c("bryant park","one world observatory","","The Tour","park slope", "Columbus Circle","Rockefeller Center","United Nations Headquarters","Ellis Island","barclays center","NYC Information Center","The museum of modern art","Elegant Tightwad Shopping Tours","Statue of Liberty","Museum of the Moving Image","New York Botanical Garden","New York Botanical Garden","Lincoln Center", "Bronx Opera House Hotel", "theater for a new audience","Tiffany & Co. Foundation Gallery",
'Chinese Scholars Garden',
'Museum of Sex',
'City Reliquary',
'Staten Island Ferry',
'MCC Theater',
'Century 21',
'Socrates Sculpture Park',
'Wave Hill House',
'Brooklyn History and Pizza Tour',
'LIC Flea',
'Yankee Stadium',
"Brandy's Piano Bar",
'Tenement Museum',
'african burial grounds',
'City Winery',
'the lakeside restaurant',
'Arthur Avenue',
'Flushing Meadows-Corona Park',
'City Island',
'Trinity Church',
'Citi Field',
'Brooklyn Brewery',
'coney island',
'FAO Schwarz',
'230 Fifth',
'The Queens Museum',
'La Marina',
'Brooklyn Bridge',
'Tango House',
'brooklyn historical society',
'Gotham West Market',
'The Metropolitan Museum of Art',
'Brooklyn Museum',
'Empire State Building',
'The Guggenheim Museum',
'Refinery Hotel',
'New York Public Library',
'Jacob K. Javits Center',
'The New Museum',
'Museum of Mathematics',
'Four Freedoms Park ',
'museum of natural history',
'The Staten Island Zoo',
'Green-wood Cemetery'
 )

#tourist_places1<-sapply(tourist_places, tolower)

tourist_places_list_df<-as.data.frame(tourist_places_list)

geo_tourist_places_list<-geocode(as.character(tourist_places_list_df$tourist_places_list))
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=bryant%20park&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "bryant
## park"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=one%20world%20observatory&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=The%20Tour&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "The Tour"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=park%20slope&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "park
## slope"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Columbus%20Circle&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Columbus
## Circle"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Rockefeller%20Center&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=United%20Nations%20Headquarters&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "United
## Nations Headquarters"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Ellis%20Island&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=barclays%20center&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=NYC%20Information%20Center&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "NYC
## Information Center"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=The%20museum%20of%20modern%20art&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "The
## museum of modern art"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Elegant%20Tightwad%20Shopping%20Tours&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Elegant
## Tightwad Shopping Tours"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Statue%20of%20Liberty&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Statue of
## Liberty"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Museum%20of%20the%20Moving%20Image&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Museum of
## the Moving Image"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=New%20York%20Botanical%20Garden&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "New York
## Botanical Garden"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=New%20York%20Botanical%20Garden&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Lincoln%20Center&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Bronx%20Opera%20House%20Hotel&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Bronx
## Opera House Hotel"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=theater%20for%20a%20new%20audience&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Tiffany%20&%20Co.%20Foundation%20Gallery&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Chinese%20Scholars%20Garden&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Museum%20of%20Sex&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Museum of
## Sex"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=City%20Reliquary&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Staten%20Island%20Ferry&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=MCC%20Theater&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Century%2021&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Century
## 21"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Socrates%20Sculpture%20Park&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Wave%20Hill%20House&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brooklyn%20History%20and%20Pizza%20Tour&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Brooklyn
## History and Pizza Tour"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=LIC%20Flea&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Yankee%20Stadium&sensor=false
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brandy's%20Piano%20Bar&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Tenement%20Museum&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=african%20burial%20grounds&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=City%20Winery&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "City
## Winery"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=the%20lakeside%20restaurant&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "the
## lakeside restaurant"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Arthur%20Avenue&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Arthur
## Avenue"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Flushing%20Meadows-Corona%20Park&sensor=false
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=City%20Island&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "City
## Island"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Trinity%20Church&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Citi%20Field&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Citi
## Field"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brooklyn%20Brewery&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=coney%20island&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=FAO%20Schwarz&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "FAO
## Schwarz"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=230%20Fifth&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "230
## Fifth"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=The%20Queens%20Museum&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "The
## Queens Museum"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=La%20Marina&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brooklyn%20Bridge&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Brooklyn
## Bridge"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Tango%20House&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Tango
## House"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=brooklyn%20historical%20society&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "brooklyn
## historical society"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Gotham%20West%20Market&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Gotham
## West Market"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=The%20Metropolitan%20Museum%20of%20Art&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brooklyn%20Museum&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Brooklyn
## Museum"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Empire%20State%20Building&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Empire
## State Building"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=The%20Guggenheim%20Museum&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "The
## Guggenheim Museum"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Refinery%20Hotel&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Refinery
## Hotel"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=New%20York%20Public%20Library&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "New York
## Public Library"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Jacob%20K.%20Javits%20Center&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Jacob K.
## Javits Center"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=The%20New%20Museum&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "The New
## Museum"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Museum%20of%20Mathematics&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Museum of
## Mathematics"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Four%20Freedoms%20Park%20&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Four
## Freedoms Park "
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=museum%20of%20natural%20history&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "museum of
## natural history"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=The%20Staten%20Island%20Zoo&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "The
## Staten Island Zoo"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Green-wood%20Cemetery&sensor=false
tourist_places_list_main<-cbind(tourist_places_list_df,geo_tourist_places_list)


filter(tourist_places_list_main, tourist_places_list_main$tourist_places_list == "Columbus Circle") 
##   tourist_places_list lon lat
## 1     Columbus Circle  NA  NA
tourist_places_list_main$lat[which(tourist_places_list_main$tourist_places_list == "Columbus Circle" )]<-"40.76803"


tourist_places_list_main$lon[which(tourist_places_list_main$tourist_places_list == "Columbus Circle" )]<-"-73.9832"

tourist_places_list_main$lat[which(tourist_places_list_main$tourist_places_list == "one world observatory" )]<-"40.7127"

tourist_places_list_main$lon[which(tourist_places_list_main$tourist_places_list == "one world observatory" )]<-"-74.0134"

tourist_places_list_main$lat[which(tourist_places_list_main$tourist_places_list == "Ellis Island" )]<-"40.6995"

tourist_places_list_main$lon[which(tourist_places_list_main$tourist_places_list == "Ellis Island" )]<-"-74.0396"

tourist_places_list_main$lat[which(tourist_places_list_main$tourist_places_list == "coney island")]<-"40.5755"

tourist_places_list_main$lon[which(tourist_places_list_main$tourist_places_list == "coney island" )]<-"-74.0396"

tourist_places_list_main$lat[which(tourist_places_list_main$tourist_places_list == "Brooklyn Museum")]<-"40.6712"

tourist_places_list_main$lon[which(tourist_places_list_main$tourist_places_list == "Brooklyn Museum" )]<-"-73.9636"

tourist_places_list_main$lat[which(tourist_places_list_main$tourist_places_list == "The Metropolitan Museum of Art")]<-"40.7794"

tourist_places_list_main$lon[which(tourist_places_list_main$tourist_places_list == "The Metropolitan Museum of Art" )]<-"-73.9632"

tourist_places_list_main$lat[which(tourist_places_list_main$tourist_places_list == "Four Freedoms Park")]<-"40.7508"

tourist_places_list_main$lon[which(tourist_places_list_main$tourist_places_list == "Four Freedoms Park" )]<-"-73.9604"

tourist_places_list_main<-filter(tourist_places_list_main, lat != "NA")

tourist_places_list_main$lat<-as.numeric(tourist_places_list_main$lat)

tourist_places_list_main$lon<-as.numeric(tourist_places_list_main$lon)
tourist_places_list_main<-unique(tourist_places_list_main)

#importing march data file

stations_list <- read.csv('/Users/ranjitarajeevashetty/Documents/R-Visualization/R Project/subway.csv', header=TRUE)


#substr('ABC_EFG_HIG_ADF_AKF_MNB', 9, 11)

stations_list$longitude<- as.numeric(substr(stations_list$the_geom, 8, 20))

#stations_list$laltitude<-as.numeric(substr(stations_list$the_geom, 27, 44))

#gsub(" .*$", "",stations_list$the_geom)
d1<-substring(stations_list$the_geom, regexpr(" ", stations_list$the_geom) + 1)
d2<-substring(d1, regexpr(" ", d1) + 1)



d3<-gsub(')','',d2)

latitude<-as.numeric(d3)


stations_list_lat_long<- cbind(stations_list, latitude)



content<- paste("neighbourhood:", total_listing$neighbourhood,"<br/>",
                "neighbourhood_group:", total_listing$neighbourhood_group,"<br/>",
                "room_type:",total_listing$room_type,"<br/>",
                "Availability:", total_listing$availability_365,"<br/>")

library(RColorBrewer)
pal = colorFactor("Set1", domain = total_listing$is_commercial) # Grab a palette
color_offsel1 = pal(total_listing$is_commercial)


#adding subway image

NYU_Subway <- makeIcon(
    iconUrl = "https://images-na.ssl-images-amazon.com/images/I/41NPZ9vZKNL.png",
  iconWidth = 25, iconHeight = 25)


mclust<-leaflet(total_listing)%>%addTiles()%>%addCircleMarkers(color = color_offsel1,popup = content,clusterOptions = markerClusterOptions(), group ="Airbnb Listing" )%>%
  addLegend(pal=pal,values=~total_listing$is_commercial, title = "colour by listing type")%>%setView(-73.9949344, 40.7179112, zoom = 11)%>%addMarkers(data= tourist_places_list_main, group = "Tourist Places",popup = paste("Tourist_place_name:",tourist_places_list_main$tourist_places_list, "<br>"))%>%addMarkers(data= stations_list_lat_long, group = "Subway Station",icon=NYU_Subway)%>%
  addProviderTiles(providers$Stamen.Toner, group = "Toner") %>%
  addProviderTiles(providers$Stamen.TonerLite, group="Toner Lite") %>%
  # Layers control
  addLayersControl(
baseGroups = c("OpenStreetMap", "Toner", "Toner Lite"),
overlayGroups = c("Airbnb Listing","Tourist Places","Subway Station"),
options = layersControlOptions(collapsed = TRUE) )%>%setView(-73.9949344, 40.7179112, zoom = 13)
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
## Assuming "lon" and "lat" are longitude and latitude, respectively
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
mclust
#filter by Borough 



# adding route between source and target

#mydf <- data.frame(region = 1:48852,
                   #from_lat = total_listing$longitude,
                   #from_long = total_listing$latitude,
                   #to_lat =total_listing$Nearest_tourist_long ,
                   #to_long = total_listing$Nearest_tourist_lat)

#mykey<- "AIzaSyAiercWjEbY3KeRto570WdpQUDcFTFgOns"

#lapply(1:nrow(mydf), function(x){

    #foo <- google_directions(origin = unlist(mydf[x, 2:3]),
                             #destination = unlist(mydf[x, 4:5]),
                             #key = mykey,
                             #mode = "driving",
                             #simplify = TRUE)

   # pl <- decode_pl(foo$routes$overview_polyline$points)

    #return(pl)
#}
   # ) %>%
#bind_rows(.id = "region") -> temp


#try Later
#polylines <- lapply(1:nrow(mydf), function(x){

  #foo <- google_directions(origin = unlist(mydf[x, 2:3]),
                           #destination = unlist(mydf[x, 4:5]),
                           #key = mykey,
                           #mode = "driving",
                           #simplify = TRUE)

  ## no need to decode the line, just return the string as-is
  #foo$routes$overview_polyline$points
#}
#)

#df <- data.frame(polylines = unlist(polylines), stringsAsFactors = F)

## add some colour values for the markers
#mydf$colour_from <- "red"
#mydf$colour_to <- "blue"

## plot the polylines and the markers
#google_map(key = mapKey) %>%
  #add_markers(data = mydf, lat = "from_lat", lon = "from_long", colour = "colour_from") %>%
  #add_markers(data = mydf, lat = "to_lat", lon = "to_long", colour = "colour_to") %>%
  #add_polylines(data = df, polyline = "polylines")


#unique(total_listing$room_type)
#install.packages("googleway")

mat <- distm(total_listing[,c('longitude','latitude')], tourist_places_list_main[,c('lon','lat')], fun=distVincentyEllipsoid)


total_listing$Tourist_place <- (tourist_places_list_main$tourist_places_list)[max.col(-mat)]
total_listing$Nearest_tourist_long<- (tourist_places_list_main$lon)[max.col(-mat)]
total_listing$Nearest_tourist_lat<- (tourist_places_list_main$lat)[max.col(-mat)]

total_listing$Min_distance<- apply(mat, 1, min)#apply min function to the matrix per row


#closest 5

#head(order(mat[-1,1]),5)+1


#head(order(mat[1,1]),5)
#mat[1,3]
#mat2=t(apply(mat,1,sort))

#hex

#install.packages("hexbin")

#library(hexbin)

#x=rnorm(total_listing$price)
#y=rnorm(total_listing$Min_distance)
#bin<- hexbin(x,y,xbins = 10)
#plot(bin,main="asd")


#typeof(total_listing$price)
#unique(total_listing$Tourist_place)
#bar plot

tourist_places_8<-total_listing%>%group_by(total_listing$Tourist_place, total_listing$is_commercial)%>%summarise(count=n())

top_places<-c('barclays center',
'Brooklyn Bridge',
'coney island',
'Empire State Building',
'Columbus Circle',
'Rockefeller Center',
'one world observatory',
'United Nations Headquarters')

top_places_df<-as.data.frame(top_places)


#factor(top_places)


names(tourist_places_8)[1]<-"Top_tourist_places"

names(tourist_places_8)[2]<-"is_commercial"
#inner join to filer, merge

top_places_filtrd<-merge(tourist_places_8,top_places_df, by.x= "Top_tourist_places" , by.y = "top_places")

#tourist_places_8%>%filter(tourist_places_8$Top_tourist_places %in% top_places)


tourist_places_8$Top_tourist_places<-as.factor(tourist_places_8$Top_tourist_places)

#which tourist place has maximum no of nearest airbnb listing.

#tourist_top<-head(tourist_places_8,40)

#tourist_places_8[order(-tourist_places_8$count),]

#filter(tourist_places_8,as.character(tourist_places_8$Top_tourist_places)=="Rockefeller Center" )

#typeof(tourist_places_8)

names(top_places_filtrd)[2]<-"Type_of_Listing"

plot2<-ggplot(top_places_filtrd, aes(x=reorder(top_places_filtrd$Top_tourist_places,-count), y =top_places_filtrd$count, fill = Type_of_Listing)) + geom_text(data= top_places_filtrd, mapping = aes(x=reorder(top_places_filtrd$Top_tourist_places,-count), y =top_places_filtrd$count, fill = top_places_filtrd$Type_of_Listing, label=top_places_filtrd$count) ,position = position_dodge(width=0.9),hjust= 0.5,vjust=0,size=3)+
  geom_bar(stat="identity", position = "dodge") +  scale_fill_brewer(palette = "Set1")+labs(subtitle="Tourist place with maximum no of nearest airbnb listing", 
       y="Cont of listing", 
       x="Tourist Places") +theme(axis.text.x = element_text(angle=90, vjust=0.6))
## Warning: Ignoring unknown aesthetics: fill
plot2

#cost variation with tourist palce

total_listing$price <- gsub("[$|,]", "", total_listing$price)

total_listing$price <- as.numeric(total_listing$price)

#cost_airbnb_tourist <-total_listing%>%group_by(Tourist_place)%>%summarise_at()

cost_airbnb_tourist<-aggregate(total_listing[,19],list(total_listing$Tourist_place,total_listing$room_type), mean)

names(cost_airbnb_tourist)[1]<-"Tourist_place"

names(cost_airbnb_tourist)[2]<-"Room_Type"

names(cost_airbnb_tourist)[3]<-"Avg_Price"

cost_airbnb_tourist_10<-head(cost_airbnb_tourist)

top_places_filtrd_cost<-merge(cost_airbnb_tourist,top_places_df, by.x= "Tourist_place" , by.y = "top_places")


plot3<-ggplot(data= top_places_filtrd_cost)+ geom_bar(mapping = aes(x = reorder(Tourist_place,-Avg_Price), y = Avg_Price,fill=Room_Type), stat = "identity",position=position_dodge())+geom_text(data= top_places_filtrd_cost, mapping = aes(x=top_places_filtrd_cost$Tourist_place, y =round(top_places_filtrd_cost$Avg_Price,2), fill = top_places_filtrd_cost$Room_Type, label=round(top_places_filtrd_cost$Avg_Price)) ,position = position_dodge(width=0.9),hjust= 0.5,vjust=0,size=3)+
labs(title= "cost variation with tourist palce", x= "Tourist_place", y = 'Price') + theme(axis.text.x = element_text(angle=90, vjust=0.6))
## Warning: Ignoring unknown aesthetics: fill
plot3

cost_airbnb_tourist<-aggregate(total_listing[,19],list(total_listing$Tourist_place,total_listing$room_type,total_listing$is_commercial), mean)



names(cost_airbnb_tourist)[1]<-"Tourist_place"

names(cost_airbnb_tourist)[2]<-"Room_Type"

names(cost_airbnb_tourist)[3]<-"Listing_Type"

names(cost_airbnb_tourist)[4]<-"Avg_Price"

cost_airbnb_tourist_non_com<-cost_airbnb_tourist%>%filter(cost_airbnb_tourist$Listing_Type=="Household Listing")

cost_airbnb_tourist_com<-cost_airbnb_tourist%>%filter(cost_airbnb_tourist$Listing_Type=="Commercial Listing")

#cost_airbnb_tourist_10<-head(cost_airbnb_tourist_com)

top_places_filtrd_cost_com<-merge(cost_airbnb_tourist_com,top_places_df, by.x= "Tourist_place" , by.y = "top_places")

top_places_filtrd_cost_non_com<-merge(cost_airbnb_tourist_non_com,top_places_df, by.x= "Tourist_place" , by.y = "top_places")


plot3_com<-ggplot(data= top_places_filtrd_cost_com)+ geom_bar(mapping = aes(x = reorder(Tourist_place,-Avg_Price), y = Avg_Price,fill=Room_Type), stat = "identity",position=position_dodge())+geom_text(data= top_places_filtrd_cost_com, mapping = aes(x=top_places_filtrd_cost_com$Tourist_place, y =round(top_places_filtrd_cost_com$Avg_Price,2), fill = top_places_filtrd_cost_com$Room_Type, label=round(top_places_filtrd_cost_com$Avg_Price)) ,position = position_dodge(width=0.9),hjust= 0.5,vjust=0,size=3)+
labs(title= "Airbnb Commercial Listing Cost variation with Tourist palce", x= "Tourist_place", y = 'Price') + theme(axis.text.x = element_text(angle=90, vjust=0.6))
## Warning: Ignoring unknown aesthetics: fill
plot3_non_com<-ggplot(data= top_places_filtrd_cost_non_com)+ geom_bar(mapping = aes(x = reorder(Tourist_place,-Avg_Price), y = Avg_Price,fill=Room_Type), stat = "identity",position=position_dodge())+geom_text(data= top_places_filtrd_cost_non_com, mapping = aes(x=top_places_filtrd_cost_non_com$Tourist_place, y =round(top_places_filtrd_cost_non_com$Avg_Price,2), fill = top_places_filtrd_cost_non_com$Room_Type, label=round(top_places_filtrd_cost_non_com$Avg_Price)) ,position = position_dodge(width=0.9),hjust= 0.5,vjust=0,size=3)+
labs(title= "Airbnb Non Commercial Listing Cost variation with Tourist palce", x= "Tourist_place", y = 'Price') + theme(axis.text.x = element_text(angle=90, vjust=0.6))
## Warning: Ignoring unknown aesthetics: fill
plot3_com

plot3_non_com

#min distance for commercial
total_listing_com<-total_listing%>%filter(total_listing$is_commercial=="Commercial Listing")

mat_com <- distm(total_listing_com[,c('longitude','latitude')], tourist_places_list_main[,c('lon','lat')], fun=distVincentyEllipsoid)


total_listing_com$Tourist_place <- (tourist_places_list_main$tourist_places_list)[max.col(-mat_com)]
total_listing_com$Nearest_tourist_long<- (tourist_places_list_main$lon)[max.col(-mat_com)]
total_listing_com$Nearest_tourist_lat<- (tourist_places_list_main$lat)[max.col(-mat_com)]

total_listing_com$Min_distance<- apply(mat_com, 1, min)#apply min function to the matrix per row

total_listing_com_newdf<-total_listing_com[,c("id","name","Tourist_place","Min_distance","is_commercial")]

#names(total_listing_com_newdf)[1]<-"non_com_id"

#names(total_listing_com_newdf)[2]<-"non_com_name"

#names(total_listing_com_newdf)[4]<-"non_com_Min_distance"

#min distance for non commercial
total_listing_noncom<-total_listing%>%filter(total_listing$is_commercial=="Household Listing")

mat_noncom <- distm(total_listing_noncom[,c('longitude','latitude')], tourist_places_list_main[,c('lon','lat')], fun=distVincentyEllipsoid)

total_listing_noncom$Tourist_place <- (tourist_places_list_main$tourist_places_list)[max.col(-mat_noncom)]
total_listing_noncom$Nearest_tourist_long<- (tourist_places_list_main$lon)[max.col(-mat_noncom)]
total_listing_noncom$Nearest_tourist_lat<- (tourist_places_list_main$lat)[max.col(-mat_noncom)]

total_listing_noncom$Min_distance<- apply(mat_noncom, 1, min)#apply min function to the matrix per row

total_listing_noncom_newdf<-total_listing_noncom[,c("id","name","Tourist_place","Min_distance","is_commercial")]


total_listing_min_dist<-rbind(total_listing_com_newdf,total_listing_noncom_newdf)

#merge(total_listing_com_newdf,total_listing_noncom_newdf,by="Tourist_place")

#merge(total_listing_min_dist,tourist_places_list_main,by.x=Tourist_place,by.y=Tourist_place)

total_listing_min_dist_df<-aggregate(total_listing_min_dist[,4],list(total_listing_min_dist$Tourist_place,total_listing_min_dist$is_commercial), min)

names(total_listing_min_dist_df)[1]<-"Tourist_Place"

names(total_listing_min_dist_df)[2]<-"Listing_Type"

names(total_listing_min_dist_df)[3]<-"Minimum_Distance"

total_listing_min_dist_df_mg<-merge(total_listing_min_dist_df,top_places_df,by.x="Tourist_Place",by.y="top_places")

plot4<-ggplot(data= total_listing_min_dist_df_mg)+ geom_bar(mapping = aes(x = reorder(Tourist_Place,-Minimum_Distance), y = Minimum_Distance,fill=Listing_Type), stat = "identity",position=position_dodge())+geom_text(data= total_listing_min_dist_df_mg, mapping = aes(x=total_listing_min_dist_df_mg$Tourist_Place, y =round(total_listing_min_dist_df_mg$Minimum_Distance,2), fill = total_listing_min_dist_df_mg$Listing_Type, label=round(total_listing_min_dist_df_mg$Minimum_Distance)) ,position = position_dodge(width=0.9),hjust= 0.5,vjust=0,size=3)+
labs(title= "Nearest Airbnb to the tourist place", x= "Tourist_place", y = 'Nearest Distnace') + theme(axis.text.x = element_text(angle=90, vjust=0.6))
## Warning: Ignoring unknown aesthetics: fill
plot4

#Non commercial is closer dan household

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.